#delimit;
clear;

import delimited "Y2022_Gen_Vote_History.csv", encoding(ISO-8859-1);


save "Y2022_Gen_Vote_History.dta", replace;



clear;

import delimited "Y2022_RO_Vote_History.csv", encoding(ISO-8859-1);


merge 1:1 reg_id  using "Y2022_Gen_Vote_History.dta";


drop _merge;



save "2022_Vote_History.dta", replace;


clear all;



use "voter_file_hist_all_imp.dta";




replace age = 20 if age == 220;
replace age = 20 if age == 120;
replace age = . if age <= 17;
replace age = . if age >= 105;


gen age_gr = 1 if age >= 18 & age <= 24;
replace age_gr = 2 if age >= 25 & age <= 34;
replace age_gr = 3 if age >= 35 & age <= 44;
replace age_gr = 4 if age >= 45 & age <= 54;
replace age_gr = 5 if age >= 55 & age <= 64;
replace age_gr = 6 if age >= 65 & age <= 74;
replace age_gr = 7 if age >= 75;


tab race_cat_vf, gen(race_inds);




merge 1:1 reg_id using "2022_Vote_History.dta";



*Generating binary turnout variables for 2022 Cycle;

gen to_2022_gen  = . ;
replace to_2022_gen = 0 if y2022_003_vote == "";
replace to_2022_gen = 1 if y2022_003_vote != "";

gen to_2022_ro  = . ;
replace to_2022_ro = 0 if y2022_004_vote == "";
replace to_2022_ro = 1 if y2022_004_vote != "";


tab to_2022_ro;
tab to_2022_gen;






#delimit cr
program define pe
	version 12.0
	if `"`0'"' != "" {
		display as text `"`0'"'
		`0'
		display("")
	}
end


#delimit;



*Coding year of registration;
tostring registration_date, replace;
gen registration_year = substr(registration_date, 1, 4);
destring registration_year, replace;
*Cleaning error in one registration date;
tab registration_date if registration_year > 2020;
replace registration_year = 2010 if registration_year == 5201;
drop registration_date;

*Creating unique precinct IDs;
egen precinct_ID = group(county_precinct_id county_code);

*Generating binary turnout variables;
gen to_2021_ro  = . if registration_year > 2021;
replace to_2021_ro = 0 if y2021_004_vote == "NV" & registration_year <= 2021;
replace to_2021_ro = 1 if y2021_004_vote != "NV" & registration_year <= 2021;

gen to_2020_gen = . if registration_year > 2020;
replace to_2020_gen = 0 if y2020_003_vote == "NV" & registration_year <= 2020;
replace to_2020_gen = 1 if y2020_003_vote != "NV" & registration_year <= 2020;

gen to_2020_prim = . if registration_year > 2020;
replace to_2020_prim = 0 if y2020_001_vote == "NV" & registration_year <= 2020;
replace to_2020_prim = 1 if y2020_001_vote != "NV" & registration_year <= 2020;

gen to_2018_ro = . if registration_year > 2018;
replace to_2018_ro = 0 if y2018_004_vote  == "NV" & registration_year <= 2018;
replace to_2018_ro = 1 if y2018_004_vote  != "NV" & registration_year <= 2018;

gen to_2018_gen = . if registration_year > 2018;
replace to_2018_gen = 0 if y2018_003_vote  == "NV" & registration_year <= 2018;
replace to_2018_gen = 1 if y2018_003_vote  != "NV" & registration_year <= 2018;

gen to_2018_prim = . if registration_year > 2018;
replace to_2018_prim = 0 if y2018_001_vote  == "NV" & registration_year <= 2018;
replace to_2018_prim = 1 if y2018_001_vote  != "NV" & registration_year <= 2018;

gen to_2016_gen = . if registration_year > 2016;
replace to_2016_gen = 0 if y2016_003_vote  == "NV" & registration_year <= 2016;
replace to_2016_gen = 1 if y2016_003_vote  != "NV" & registration_year <= 2016;

gen to_2014_gen = . if registration_year > 2014;
replace to_2014_gen = 0 if y2014_003_vote  == "NV" & registration_year <= 2014;
replace to_2014_gen = 1 if y2014_003_vote  != "NV" & registration_year <= 2014;


gen rep_prim_2020 = 1 if y2020_001_party == "R ";
replace rep_prim_2020 = 0 if y2020_001_party != "R ";


gen dem_prim_2020 = 1 if y2020_001_party == "D ";
replace dem_prim_2020 = 0 if y2020_001_party != "D ";

gen in_balanced_panel = 1 if to_2014_gen != . & to_2016_gen != . & to_2018_gen != . & to_2020_gen != .;


gen only_2016g_2020g = 1 if to_2020_gen == 1 & to_2016_gen == 1 & to_2020_prim == 0 & to_2018_ro == 0 & to_2018_gen == 0 & to_2014_gen == 0;
#delimit;

gen nd_pres_app_bin_ga_rv_extend = pres_app_bin_ga_rv_extend;
replace nd_pres_app_bin_ga_rv_extend = 0 if dem_prim_2020 == 1;

label variable nd_pres_app_bin_ga_rv_extend "Trump Approver";
label variable pres_app_bin_ga_rv_extend "Trump Approver";





#delimit;
*Reshaping for Fixed Effects Regressions;
keep age pres_app_bin_ga_rv_extend nd_pres_app_bin_ga_rv_extend in_balanced_panel rep_prim_2020 dem_prim_2020 only_2016g_2020g to_2021_ro reg_id county_code  pres_app_bin_ga_rv_extend precinct_ID  to_2022_ro to_2022_gen to_2020_gen to_2020_prim to_2018_ro to_2018_gen to_2018_prim to_2016_gen to_2014_gen y2020_001_party y2016_010_party female race_ind*;



reshape long to_, i(reg_id) j(elec_yt) string;



drop if to_ == .;
tab elec_yt, gen(elec_yt_);



*Dropping elections that are not used to speed computation;
keep if elec_yt_1 == 1 | elec_yt_2 == 1 | elec_yt_3 == 1 | elec_yt_6 == 1 | elec_yt_8 == 1 | elec_yt_9 == 1 | elec_yt_10 == 1;





*Generate Interaction Between Election Cycle and Trump Approver; 
*Generate Interaction Between Election Cycle and Voting in 2020 Rep. Primary;



#delimit cr
forvalues k = 1(1)10{
	gen tr_elec_yt_`k' = elec_yt_`k'*pres_app_bin_ga_rv_extend
	gen ndem_tr_elec_yt_`k' = tr_elec_yt_`k'
	replace ndem_tr_elec_yt_`k' = 0 if dem_prim_2020 == 1
	
	}
	
	
#delimit;
	
*drop tr_elec_yt_*;

label variable ndem_tr_elec_yt_1 "2014 Gen. $\times$ Trump Approver";
label variable ndem_tr_elec_yt_2 "2016 Gen. $\times$ Trump Approver";
label variable ndem_tr_elec_yt_3 "2018 Gen. $\times$ Trump Approver";
label variable ndem_tr_elec_yt_4 "2018 Prim. $\times$ Trump Approver";
label variable ndem_tr_elec_yt_5 "2018 Runoff $\times$ Trump Approver";
label variable ndem_tr_elec_yt_6 "2020 Gen. $\times$ Trump Approver";
label variable ndem_tr_elec_yt_7 "2020 Prim. $\times$ Trump Approver";
label variable ndem_tr_elec_yt_8 "2021 Runoff $\times$ Trump Approver";
label variable ndem_tr_elec_yt_9 "2022 Gen. $\times$ Trump Approver";
label variable ndem_tr_elec_yt_10 "2022 Runoff $\times$ Trump Approver";




label variable elec_yt_1 "2014 Gen.";
label variable elec_yt_2 "2016 Gen.";
label variable elec_yt_3 "2018 Gen.";
label variable elec_yt_4 "2018 Prim.";
label variable elec_yt_5 "2018 Runoff";
label variable elec_yt_6 "2020 Gen.";
label variable elec_yt_7 "2020 Prim.";
label variable elec_yt_8 "2021 Runoff";
label variable elec_yt_9 "2022 General";
label variable elec_yt_10 "2022 Runoff";


	


#delimit cr
capture progam drop get_star_coeff
program define get_star_coeff, rclass
	
	local point_est = r(estimate)
	local est_se = r(se)
	local df = r(df)
	
	local t_stat = abs(`point_est'/`est_se')
	
	local crit_val_point1 = invttail(`df',0.05)
	local crit_val_point05 = invttail(`df',0.025)
	local crit_val_point01 = invttail(`df',0.005)
	local crit_val_point001 = invttail(`df',0.0005)
	
	if `t_stat' <= `crit_val_point1' {
		local tau_dat string(round(`point_est', .00001)) 
	}
	else if `t_stat' >= `crit_val_point1' & `t_stat' < `crit_val_point05' {
		local tau_dat string(round(`point_est', .00001)) + "\sym{+}"
	} 
	else if `t_stat' >= `crit_val_point05' & `t_stat' < `crit_val_point01' {
		local tau_dat string(round(`point_est', .00001)) + "\sym{*}"
	} 
	else if `t_stat' >= `crit_val_point01' & `t_stat' < `crit_val_point001' {
		local tau_dat string(round(`point_est', .00001)) + "\sym{**}"
	} 
	else {
		local tau_dat string(round(`point_est', .00001)) + "\sym{***}"
	}
	
	return local tau_dat = `tau_dat'
	 
end





#delimit;


gen elec_20_22 = 1 if elec_yt_6 == 1 | elec_yt_8 == 1 | elec_yt_9 == 1 | elec_yt_10 == 1;
gen sample_inclusion = 1 if elec_20_22 == 1;

local elec_cyc_fes elec_yt_8 elec_yt_9 elec_yt_10 ;




eststo: reghdfe to_ ndem_tr_elec_yt_10 ndem_tr_elec_yt_9 ndem_tr_elec_yt_8  `elec_cyc_fes' if sample_inclusion == 1,  
absorb(reg_id) vce(cluster reg_id ) level(95);

estadd local num_indiv =  e(N_clust);
estadd local indiv_FE "Yes";
estadd local samp_ex "No";
estadd local bal_panel "No";
estadd local county_elec_FE "No";


lincom ndem_tr_elec_yt_8 - ndem_tr_elec_yt_10; 
estadd local se_sum_effect = "("+string(round(r(se), .000001))+ ")";
get_star_coeff;
estadd local sum_effect = r(tau_dat);






drop elec_20_22  sample_inclusion;

#delimit;

*Only General Elections for Estimating FEs;
gen gen_elec_spec = 1 if elec_yt_1 == 1 | elec_yt_2 == 1 | elec_yt_3 == 1 | elec_yt_6 == 1 | elec_yt_8 == 1 | elec_yt_9 == 1 | elec_yt_10 == 1;


gen sample_inclusion = 1 if gen_elec_spec == 1;


local genelec_cycle_fes elec_yt_1-elec_yt_2 elec_yt_8 elec_yt_3 elec_yt_9 elec_yt_10;
	


eststo: reghdfe to_  ndem_tr_elec_yt_10 ndem_tr_elec_yt_9 ndem_tr_elec_yt_8 ndem_tr_elec_yt_3 ndem_tr_elec_yt_2 ndem_tr_elec_yt_1  `genelec_cycle_fes'  if sample_inclusion == 1,  
absorb(reg_id) vce(cluster reg_id ) level(95);
estadd local num_indiv =  e(N_clust);
estadd local indiv_FE "Yes";
estadd local samp_ex "No";
estadd local county_elec_FE "No";

lincom ndem_tr_elec_yt_8 - ndem_tr_elec_yt_10; 
estadd local se_sum_effect = "("+string(round(r(se), .000001))+ ")";
get_star_coeff;
estadd local sum_effect = r(tau_dat);





eststo: reghdfe to_  ndem_tr_elec_yt_10 ndem_tr_elec_yt_9 ndem_tr_elec_yt_8 ndem_tr_elec_yt_3 ndem_tr_elec_yt_2 ndem_tr_elec_yt_1  `genelec_cycle_fes'  if sample_inclusion == 1 & only_2016g_2020g != 1,  
absorb(reg_id) vce(cluster reg_id ) level(95);
estadd local num_indiv =  e(N_clust);
estadd local indiv_FE "Yes";
estadd local samp_ex "Yes";
estadd local bal_panel "No";
estadd local county_elec_FE "No";

lincom ndem_tr_elec_yt_8 - ndem_tr_elec_yt_10; 
estadd local se_sum_effect = "("+string(round(r(se), .000001))+ ")";
get_star_coeff;
estadd local sum_effect = r(tau_dat);



eststo: reghdfe to_  ndem_tr_elec_yt_10 ndem_tr_elec_yt_9 ndem_tr_elec_yt_8 ndem_tr_elec_yt_3 ndem_tr_elec_yt_2 ndem_tr_elec_yt_1   if sample_inclusion == 1 & only_2016g_2020g != 1,  
absorb(reg_id i.county_code#i.elec_yt_1 i.county_code#i.elec_yt_2 i.county_code#i.elec_yt_3 i.county_code#i.elec_yt_8 i.county_code#i.elec_yt_9 i.county_code#i.elec_yt_10 ) vce(cluster reg_id ) level(95);

estadd local num_indiv =  e(N_clust);
estadd local indiv_FE "Yes";
estadd local samp_ex "Yes";
estadd local county_elec_FE "Yes";

lincom ndem_tr_elec_yt_8 - ndem_tr_elec_yt_10; 
estadd local se_sum_effect = "("+string(round(r(se), .000001))+ ")";
get_star_coeff;
estadd local sum_effect = r(tau_dat);



esttab using FE_2022_Elec_Models_PB.tex, 
note("Robust standard errors clustered at individual level reported in parentheses")  
label se star(+ 0.10 * 0.05 ** 0.01 *** 0.001) mlabels("" "" "" "") r2 obslast
keep(ndem_tr_elec_yt_10 ndem_tr_elec_yt_9 ndem_tr_elec_yt_8 ndem_tr_elec_yt_3 ndem_tr_elec_yt_2 ndem_tr_elec_yt_1 elec_yt_10 elec_yt_9 elec_yt_8 elec_yt_3 elec_yt_2 elec_yt_1)
order(ndem_tr_elec_yt_10 ndem_tr_elec_yt_9 ndem_tr_elec_yt_8 ndem_tr_elec_yt_3 ndem_tr_elec_yt_2 ndem_tr_elec_yt_1 elec_yt_10 elec_yt_9 elec_yt_8 elec_yt_3 elec_yt_2 elec_yt_1)
title("Prob. Turnout Including 2022 Runoff \label{FE2022ElecModelsPB}") replace  scalars( "indiv_FE Individual FEs" 
 "samp_ex Ex. Registrants"  "county_elec_FE County by Election FE" "num_indiv Num. Indivs." "sum_effect Diff. of Coeffs" "se_sum_effect SE Diff. of Coeffs" );
